#### #DictatorErdogan: How Social Media Bans Trigger Backlash ####

#### + Load Data ####

# Set Seed
set.seed(21401)
# Load Libraries
library(Synth)
library(sandwich)
library(lmtest)
library(plm)
library(ggplot2)
library(ggrepel)
library(scales)
library(stargazer)
# Set Working Directory
setwd("~")
# Load Data
load("./Miller_PolCom_Data_Final.RData")

#### + Figure 2: Hybrid Regimes Engage in Moderate Censorship ####

p1 <- ggplot(c,aes(x=jitter(e_polity2), y=v2mecenefi))+ # 
  geom_smooth(color="black",size=0.3)+geom_point(alpha=.25)+
  theme_bw()+
  xlab("Polity Score")+ylab("Internet Freedom Score"); p1

#### + Table 1: Synthetic Control Characteristics ####

# creating vector with covariate labels
covs <- c("NY.GDP.PCAP.CD",
          "SL.TLF.ACTI.1524.ZS",
          "IT.CEL.SETS.P2",
          "IC.LGL.CRED.XQ",
          "IT.NET.BBND.P2")

### Media Mentions 
# Prepping Data
dataprep.med <- dataprep(foo = d,
                         predictors = covs, 
                         predictors.op = "mean",
                         dependent = "media",
                         unit.variable = "iso_code",
                         time.variable = "time",
                         treatment.identifier = 792, # Turkey ISO code
                         controls.identifier = unique(d$iso_code)[c(1:10,12:16)],  
                         time.predictors.prior = c(1:47), 
                         time.optimize.ssr = c(1:47),
                         unit.names.variable = "country_name",
                         time.plot = c(1:89)
)
# Creating Synthetic Control Unit
synth.med <- synth(dataprep.med)
# Summary Tables
tables.med <- synth.tab(
  dataprep.res = dataprep.med,
  synth.res = synth.med)

### Google Searches
# Prepping Data
dataprep.goog <- dataprep(foo = d,
                          predictors = covs, 
                          predictors.op = "mean",
                          dependent = "goog_avg",
                          unit.variable = "iso_code",
                          time.variable = "time",
                          treatment.identifier = 792, # Turkey ISO code
                          controls.identifier = unique(d$iso_code)[c(1:10,12:16)],
                          time.predictors.prior = c(1:47), 
                          time.optimize.ssr = c(1:47), 
                          unit.names.variable = "country_name",
                          time.plot = c(1:89)
)
# Creating Synthetic Control Unit
synth.goog <- synth(dataprep.goog)
## Summary tables 
tables.goog <- synth.tab(
  dataprep.res = dataprep.goog,
  synth.res = synth.goog)

### Tor Usage
# Prepping Data
dataprep.tor <- dataprep(foo = d,
                         predictors = covs, 
                         predictors.op = "mean",
                         dependent = "tor_clients",
                         unit.variable = "iso_code",
                         time.variable = "time",
                         treatment.identifier = 792, # Turkey ISO code
                         controls.identifier = unique(d$iso_code)[c(1:10,12:16)], 
                         time.predictors.prior = c(1:47), 
                         time.optimize.ssr = c(1:47),
                         unit.names.variable = "country_name",
                         time.plot = c(1:89)
)
# Creating Synthetic Control Unit
synth.tor <- synth(dataprep.tor)
# Summary tables 
tables.tor <- synth.tab(
  dataprep.res = dataprep.tor,
  synth.res = synth.tor)

### Generate Balance Table
bal.table <- rbind(tables.med$tab.pred,
                   tables.goog$tab.pred,
                   tables.tor$tab.pred)
rownames(bal.table) <- c("GDP per capita","Youth Labor Rate","Mobile Subscriptions (per 100)",
                         "Legal Rights Index","Broadband Subscriptions (per 100)",
                         "GDP per capita ","Youth Labor Rate ","Mobile Subscriptions (per 100) ",
                         "Legal Rights Index ","Broadband Subscriptions (per 100) ",
                         "GDP per capita  ","Youth Labor Rate  ","Mobile Subscriptions (per 100)  ",
                         "Legal Rights Index  ","Broadband Subscriptions (per 100)  ")
print(bal.table)

#### + Figure 3: Circumention Tool Demand ####

### Media Mentions 
# Table of Turkey vs. Synthetic Turkey
gap.med <- dataprep.med$Y1plot - (dataprep.med$Y0plot %*% synth.med$solution.w) 
gap.med <- cbind(gap.med,dataprep.med$Y1plot,(dataprep.med$Y0plot %*% synth.med$solution.w))
gap.med <- data.frame(gap.med)
colnames(gap.med)<-c("gap","actual","synthetic")
gap.med <- data.frame(rbind(cbind(gap.med$synthetic,"Turkey (Synthetic)"),
                             cbind(gap.med$actual,"Turkey (Actual)")))
colnames(gap.med) <- c("effect","type")
gap.med$effect <- as.numeric(as.character(gap.med$effect))
gap.med$type <- as.character(gap.med$type)
gap.med$time <- rep(c(-46:42),2)
# Plot
p1 <- ggplot(gap.med,aes(x=time,lty=type)) + 
  geom_vline(aes(xintercept=0),linetype="dotted") +
  geom_line(aes(y=effect),alpha=1,show_guide=FALSE) + 
  theme(panel.background= element_rect(fill = 'white',color="gray"),
        plot.title = element_text(hjust = 0.5),
        legend.position="top",axis.title=element_text(size=12),
        axis.text=element_text(size=8),
        strip.text.x = element_text(size = 9),
        legend.text=element_text(size=10),
        legend.key=element_rect(fill="white"),
        legend.title=element_blank())+
  scale_y_continuous(limits = c(-10, 200))  +
  xlab("Days Before and After Twitter Ban") +
  ylab("Number of Sentences") +
  scale_colour_manual(values=c("indianred1","slategray4"))+
  scale_x_continuous(limits = c(-30, 30),
                     breaks = c(seq(-30,30,10))); p1

### Google Searches
# Table of Turkey vs. Synthetic Turkey
gap.goog <- dataprep.goog$Y1plot - (dataprep.goog$Y0plot %*% synth.goog$solution.w)
gap.goog <- cbind(gap.goog,dataprep.goog$Y1plot,(dataprep.goog$Y0plot %*% synth.goog$solution.w))
gap.goog <- data.frame(gap.goog)
colnames(gap.goog)<-c("gap","actual","synthetic")
gap.goog <- data.frame(rbind(cbind(gap.goog$synthetic,"Turkey (Synthetic)"),
                              cbind(gap.goog$actual,"Turkey (Actual)")))
colnames(gap.goog) <- c("effect","type")
gap.goog$effect <- as.numeric(as.character(gap.goog$effect))
gap.goog$type <- as.character(gap.goog$type)
gap.goog$time <- rep(c(-46:42),2)
# Plot
p2 <- ggplot(gap.goog,aes(x=time,lty=type)) + 
  geom_vline(aes(xintercept=0),linetype="dotted") +
  geom_line(aes(y=effect),alpha=1,show_guide=FALSE) + 
  theme(panel.background= element_rect(fill = 'white',color="gray"),
        plot.title = element_text(hjust = 0.5),
        legend.position="top",axis.title=element_text(size=12),
        axis.text=element_text(size=8),
        strip.text.x = element_text(size = 9),
        legend.text=element_text(size=10),
        legend.key=element_rect(fill="white"),
        legend.title=element_blank())+
  scale_y_continuous(limits = c(-5, 100))  +
  xlab("Days Before and After Twitter Ban") +
  ylab("Search Volume (Normalized)") +
  scale_colour_manual(values=c("indianred1","slategray4"))+
  scale_linetype_manual(values=c(1,4))  + labs(fill="")+
  scale_x_continuous(limits = c(-30, 30),
                     breaks = c(seq(-30,30,10))); p2

### Tor Usage
# Table of Turkey vs. Synthetic Turkey
gap.tor <- dataprep.tor$Y1plot - (dataprep.tor$Y0plot %*% synth.tor$solution.w)
gap.tor <- cbind(gap.tor,dataprep.tor$Y1plot,(dataprep.tor$Y0plot %*% synth.tor$solution.w))
gap.tor<-data.frame(gap.tor)
colnames(gap.tor)<-c("gap","actual","synthetic")
gap.tor <- data.frame(rbind(cbind(gap.tor$synthetic,"Turkey (Synthetic)"),
                             cbind(gap.tor$actual,"Turkey (Actual)")))
colnames(gap.tor) <- c("effect","type")
gap.tor$effect <- as.numeric(as.character(gap.tor$effect))
gap.tor$type <- as.character(gap.tor$type)
gap.tor$time <- rep(c(-46:42),2)
# Plot
p3 <- ggplot(gap.tor,aes(x=time,lty=type)) + # ,lty=type
  geom_vline(aes(xintercept=0),linetype="dotted") +
  geom_line(aes(y=effect),alpha=1) + 
  theme(panel.background= element_rect(fill = 'white',color="gray"),
        plot.title = element_text(hjust = 0.5),
        legend.position="bottom",axis.title=element_text(size=12),
        axis.text=element_text(size=8),
        strip.text.x = element_text(size = 9),
        legend.text=element_text(size=12),
        legend.key=element_rect(fill="white"),
        legend.title=element_blank())+
  scale_y_continuous(limits = c(15000, 80000))  +
  # scale_x_continuous(limits = c(-30, 30))  +
  xlab("Days Before and After Twitter Ban") +
  ylab("Tor Clients") +
  #  ggtitle("Tor Connected Clients in Turkey") +
  scale_colour_manual(values=c("indianred1","slategray4"))+
  scale_linetype_manual(values=c(1,4))  + labs(fill="")+
  scale_x_continuous(limits = c(-30, 30),
                     breaks = c(seq(-30,30,10))); p3

#### + Figure 4: Twitter Use after the Ban ####

### Twitter Frequency Daily 
p1 <- ggplot(tweet,aes(x=day,y=tweets)) +
  geom_line(color="black", alpha=1) +
  geom_vline(aes(xintercept=as.numeric(tweet$day[5])),linetype=2,alpha=.5) +
  geom_vline(aes(xintercept=as.numeric(tweet$day[19])),linetype=2,alpha=.5) +
  xlab("Day") +
  ylab("Number of Tweets") +
  scale_y_continuous(labels = comma,limits = c(0, 500000)) +
  theme_bw()+
  theme(plot.title = element_text(hjust = 0.5)); p1

### Unique Users Daily
p2 <- ggplot(tweet,aes(x=day,y=users)) +
  geom_line(color="black",alpha=1) +
  geom_vline(aes(xintercept=as.numeric(tweet$day[5])),linetype=2,alpha=.5) +
  geom_vline(aes(xintercept=as.numeric(tweet$day[19])),linetype=2,alpha=.5) +
  xlab("Day") +
  ylab("Twitter Users") +
  scale_y_continuous(labels = comma,limits = c(0, 160000)) +
  theme_bw()+
  theme(plot.title = element_text(hjust = 0.5)); p2

#### + Figure 5: Sentiment Analysis Graph ####

### Graph showing change in sentiment at different levels of activity
# All Tweeters (100 Tweets)
sof.march21.ts.all100 <- subset(sof.march21.ts,tweet_total100==1) 
mod100 <- plm(sent.sum.sd ~ postban, 
              data = sof.march21.ts.all100,
              index = c("sender_id"), 
              model = "within")
G <- length(unique(sof.march21.ts.all100$sender_id)) 
c <- G/(G - 1)
mod100r <- coeftest(mod100, c * vcovHC(mod100, type = "HC1", cluster = "group")); mod100r
# All Tweeters (75 Tweets)
sof.march21.ts.all75 <- subset(sof.march21.ts,tweet_total75==1) 
mod75 <- plm(sent.sum.sd ~ postban, 
             data = sof.march21.ts.all75,
             index = c("sender_id"), 
             model = "within")
G <- length(unique(sof.march21.ts.all75$sender_id)) 
c <- G/(G - 1)
mod75r <- coeftest(mod75, c * vcovHC(mod75, type = "HC1", cluster = "group")); mod75r
# All Tweeters (50 Tweets)
sof.march21.ts.all50 <- subset(sof.march21.ts,tweet_total50==1) 
mod50 <- plm(sent.sum.sd ~ postban, 
             data = sof.march21.ts.all50,
             index = c("sender_id"), 
             model = "within")
G <- length(unique(sof.march21.ts.all50$sender_id)) 
c <- G/(G - 1)
mod50r <- coeftest(mod50, c * vcovHC(mod50, type = "HC1", cluster = "group")); mod50r
# All Tweeters (25 Tweets)
sof.march21.ts.all25 <- subset(sof.march21.ts,tweet_total25==1) 
mod25 <- plm(sent.sum.sd ~ postban, 
             data = sof.march21.ts.all25,
             index = c("sender_id"), 
             model = "within")
G <- length(unique(sof.march21.ts.all25$sender_id)) 
c <- G/(G - 1)
mod25r <- coeftest(mod25, c * vcovHC(mod25, type = "HC1", cluster = "group")); mod25r
# All Tweeters (3 Tweets)
sof.march21.ts.all3 <- subset(sof.march21.ts,tweet_total3==1) 
mod3 <- plm(sent.sum.sd ~ postban, 
            data = sof.march21.ts.all3,
            index = c("sender_id"), 
            model = "within")
G <- length(unique(sof.march21.ts.all3$sender_id)) 
c <- G/(G - 1)
mod3r <- coeftest(mod3, c * vcovHC(mod3, type = "HC1", cluster = "group")); mod3r

# Create Dataframe
tweet_sent <- data.frame(Tweets=c(100,75,50,25,3),
                     Coef=c(as.numeric(mod100$coefficients),as.numeric(mod75$coefficients),as.numeric(mod50$coefficients),as.numeric(mod25$coefficients),as.numeric(mod3$coefficients)),
                     Total=c(sum(sof.march21.ts.all100$total),sum(sof.march21.ts.all75$total),sum(sof.march21.ts.all50$total),sum(sof.march21.ts.all25$total),sum(sof.march21.ts.all3$total))
)
# Plot
p <- ggplot(tweet_sent,aes(x=Tweets,y=Coef,size=Total))+
  geom_point()+xlim(0,100)+ylim(-0.17,0.01)+
  geom_hline(yintercept=0, linetype="dashed", color = "black")+
  labs(y ="Change in Polarity", x = "User Tweet Volume") +
  scale_size_continuous(labels = comma)+
  theme_bw()+theme(legend.position = "bottom"); p

# Table
stargazer(mod100r,mod75r,mod50r,mod25r,mod3r,type="text",
          column.labels   = c("Change in Polarity"),
          column.separate = c(2, 2),
          column.sep.width = "5pt",
          label = "tab:sentiment",
          model.numbers=TRUE,
          #font.size = "footnotesize",
          digits = 2,
          dep.var.labels=c("Change in Polarity"),
          add.lines = list(c("Cluster-Robust SEs", "\\checkmark", "\\checkmark","\\checkmark","\\checkmark"),
                           c("User Fixed Effects", "\\checkmark", "\\checkmark","\\checkmark","\\checkmark"),
                           c("Users", 
                             formatC(length(unique(sof.march21.ts.all100$sender_id)),format="d",big.mark=","),
                             formatC(length(unique(sof.march21.ts.all75$sender_id)),format="d",big.mark=","),
                             formatC(length(unique(sof.march21.ts.all50$sender_id)),format="d",big.mark=","),
                             formatC(length(unique(sof.march21.ts.all25$sender_id)),format="d",big.mark=","),
                             formatC(length(unique(sof.march21.ts.all3$sender_id)),format="d",big.mark=",")
                           ),
                           c("Tweets", 
                             formatC(sum(sof.march21.ts.all100$total),format="d",big.mark=","),
                             formatC(sum(sof.march21.ts.all75$total),format="d",big.mark=","),
                             formatC(sum(sof.march21.ts.all50$total),format="d",big.mark=","),
                             formatC(sum(sof.march21.ts.all25$total),format="d",big.mark=","),
                             formatC(sum(sof.march21.ts.all3$total),format="d",big.mark=",")
                           ),
                           c("Observations", 
                             formatC(nobs(mod100),format="d",big.mark=","),
                             formatC(nobs(mod75),format="d",big.mark=","),
                             formatC(nobs(mod50),format="d",big.mark=","),
                             formatC(nobs(mod25),format="d",big.mark=","),
                             formatC(nobs(mod3),format="d",big.mark=",")
                           ) 
          ),
          title = "Sentiment on Twitter Shifts Negatively after Ban"
)

#### + Table 2: Sentiment Analysis ####

### Time-series regressions (sentiment)
# Erdogan Tweeters (anytime)
sof.march21.ts.erdo <- subset(sof.march21.ts,erdo6_total1==1)
mod2 <- plm(sent.erdo.sum.sd6 ~ postban, 
                    data = sof.march21.ts.erdo,
                    index = c("sender_id"), 
                    model = "within")
G <- length(unique(sof.march21.ts.erdo$sender_id)) 
c <- G/(G - 1)
mod2r <- coeftest(mod2, c * vcovHC(mod2, type = "HC1", cluster = "group")); mod2r
# Erdogan Tweeters (after)
sof.march21.ts.erdo.both <- subset(sof.ts, post.erdo.march21==1 & turk_time>=as.Date("2014-03-15") & turk_time<=as.Date("2014-03-21"))
mod4 <- plm(sent.erdo.sum.sd6 ~ postban, 
            data = sof.march21.ts.erdo.both,
            index = c("sender_id"), 
            model = "within")
G <- length(unique(sof.march21.ts.erdo.both$sender_id))
c <- G/(G - 1)
mod4r <- coeftest(mod4, c * vcovHC(mod4, type = "HC1", cluster = "group")); mod4r

### Time-series regressions (total twwets)
# Erdogan Tweeters (anytime)
sof.march21.ts.erdo <- subset(sof.march21.ts,erdo6_total1==1)
mod5 <- plm(total.erdo.sd6 ~ postban, 
            data = sof.march21.ts.erdo,
            index = c("sender_id"), 
            model = "within")
G <- length(unique(sof.march21.ts.erdo$sender_id))
c <- G/(G - 1)
mod5r <- coeftest(mod5, c * vcovHC(mod5, type = "HC1", cluster = "group")); mod5r
# Erdogan Tweeters (after)
sof.march21.ts.erdo.both <- subset(sof.ts, post.erdo.march21==1 & turk_time>=as.Date("2014-03-15") & turk_time<=as.Date("2014-03-21"))
mod6 <- plm(total.erdo.sd6 ~ postban, 
            data = sof.march21.ts.erdo.both,
            index = c("sender_id"), 
            model = "within")
G <- length(unique(sof.march21.ts.erdo.both$sender_id)) 
c <- G/(G - 1)
mod6r <- coeftest(mod6, c * vcovHC(mod6, type = "HC1", cluster = "group")); mod6r

### Stargazer
stargazer(mod2r,mod4r,mod5r,mod6r,type="text",
          column.labels   = c("Sentiment", "Total Tweets"),
          column.separate = c(2, 2),
          column.sep.width = "5pt",
          label = "tab:sentiment",
          model.numbers=TRUE,
          #font.size = "footnotesize",
          digits = 2,
          dep.var.labels=c("Change in Polarity"),
          add.lines = list(c("Cluster-Robust SEs", "\\checkmark", "\\checkmark","\\checkmark","\\checkmark"),
                           c("User Fixed Effects", "\\checkmark", "\\checkmark","\\checkmark","\\checkmark"),
                           c("Users", 
                             formatC(length(unique(sof.march21.ts.erdo$sender_id)),format="d",big.mark=","),
                             formatC(length(unique(sof.march21.ts.erdo.both$sender_id)),format="d",big.mark=","),
                             formatC(length(unique(sof.march21.ts.erdo$sender_id)),format="d",big.mark=","),
                             formatC(length(unique(sof.march21.ts.erdo.both$sender_id)),format="d",big.mark=",")
                             ),
                           c("Tweets", 
                             formatC(sum(sof.march21.ts.erdo$total.erdo6),format="d",big.mark=","),
                             formatC(sum(sof.march21.ts.erdo.both$total.erdo6),format="d",big.mark=","),
                             formatC(sum(sof.march21.ts.erdo$total.erdo6),format="d",big.mark=","),
                             formatC(sum(sof.march21.ts.erdo.both$total.erdo6),format="d",big.mark=",")
                             ),
                          c("Observations", 
                             formatC(nobs(mod2),format="d",big.mark=","),
                             formatC(nobs(mod4),format="d",big.mark=","),
                             formatC(nobs(mod5),format="d",big.mark=","),
                             formatC(nobs(mod6),format="d",big.mark=",")
                            ) 
          ),
          title = "Sentiment on Twitter Shifts Negatively after Ban"
)

#### + Figure 5: Top Terms ####

p <- ggplot(d.tm,aes(x=Term,y=Proportion,label=word))+
  ylab("Proportion among Top Terms")+ylim(0,.3)+
  geom_text_repel(segment.color = 'transparent')+
  xlab("Top Terms")+
  theme_bw()+
  theme(strip.background=element_rect(fill="white"))+
  facet_grid(~Period); p
